Main df
head(churn)
## # A tibble: 6 x 10
## ID START_DATE END_DATE LIFETIME RATING PHONE EMAIL
## <dbl> <dttm> <dttm> <dbl> <chr> <chr> <chr>
## 1 1 2014-01-06 23:00:00 2014-03-18 23:00:00 71 NONE N N
## 2 2 2013-07-05 00:00:00 2014-07-17 23:00:00 378 NONE N N
## 3 3 2015-06-04 00:00:00 2017-04-17 23:00:00 684 NONE Y Y
## 4 4 2014-01-08 23:00:00 2014-06-26 23:00:00 169 NONE N N
## 5 5 2014-07-23 00:00:00 2014-11-16 23:00:00 117 NONE N N
## 6 6 2017-07-21 00:00:00 2018-07-19 00:00:00 363 D Y Y
## # ... with 3 more variables: CONTRACT <dbl>, COMPLAINT <dbl>,
## # CHURNED <dbl>
glimpse(churn)
## Observations: 10,000
## Variables: 10
## $ ID <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ...
## $ START_DATE <dttm> 2014-01-06 23:00:00, 2013-07-05 00:00:00, 2015-06-...
## $ END_DATE <dttm> 2014-03-18 23:00:00, 2014-07-17 23:00:00, 2017-04-...
## $ LIFETIME <dbl> 71, 378, 684, 169, 117, 363, 571, 563, 772, 184, 39...
## $ RATING <chr> "NONE", "NONE", "NONE", "NONE", "NONE", "D", "NONE"...
## $ PHONE <chr> "N", "N", "Y", "N", "N", "Y", "N", "Y", "Y", "N", "...
## $ EMAIL <chr> "N", "N", "Y", "N", "N", "Y", "N", "Y", "Y", "N", "...
## $ CONTRACT <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, ...
## $ COMPLAINT <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ CHURNED <dbl> 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, ...
Dygraphs tibble
head(timeseries)
## # A tibble: 6 x 3
## Date Male Female
## <dttm> <dbl> <dbl>
## 1 2013-01-01 00:00:00 300958 380666
## 2 2013-01-02 00:00:00 300522 379425
## 3 2013-01-03 00:00:00 300194 378252
## 4 2013-01-04 00:00:00 299399 377525
## 5 2013-01-05 00:00:00 298566 376250
## 6 2013-01-06 00:00:00 298790 376519
glimpse(timeseries)
## Observations: 315
## Variables: 3
## $ Date <dttm> 2013-01-01, 2013-01-02, 2013-01-03, 2013-01-04, 2013-0...
## $ Male <dbl> 300958, 300522, 300194, 299399, 298566, 298790, 298233,...
## $ Female <dbl> 380666, 379425, 378252, 377525, 376250, 376519, 375724,...
data("diamonds")
head(diamonds)
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.290 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
ggplot(diamonds, aes(x=price)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

hist(diamonds$price)

ggplot(diamonds) +
geom_histogram(aes(x=log(price)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(diamonds, aes(x = price/carat)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(diamonds,
aes(x=price)) +
geom_density()

plot(density(diamonds$price))

plot(churn$LIFETIME)

plot(diamonds$depth,
col = if_else(diamonds$depth > 75 | diamonds$depth < 50, "red", "black"),
main = "Outliers",
ylab = "Depth (%)")

ggplot(diamonds, aes(x=carat,y=price)) +
geom_point(aes(colour=cut))

ggplot(diamonds, aes(x=carat,y=price)) +
geom_jitter(aes(colour=cut), alpha = 0.5)

ggplot(diamonds,
aes(cut,price)) +
geom_boxplot() +
theme_bw()

ggplot(diamonds,
aes(cut,carat)) +
geom_boxplot()

ggplot(diamonds,
aes(cut,price)) +
geom_boxplot(outlier.shape = NA,
width = 0.25) +
scale_y_continuous(limits = c(0,15000)) +
theme_bw()
## Warning: Removed 1655 rows containing non-finite values (stat_boxplot).

ggplot(diamonds,
aes(cut,price)) +
geom_violin(fill = "blue") +
coord_flip()

diamonds %>%
group_by(cut) %>%
summarise(median_price = median(price,na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(reorder(cut,median_price),median_price)) +
geom_col(width = 0.75,
fill = "lavender",
colour = "black") +
geom_hline(yintercept = 0) +
labs(x = "Cut",
y = "Median Price") +
coord_flip()

churn %>%
select(START_DATE,RATING) %>%
mutate(START_MONTH = as.yearmon(START_DATE)) %>%
mutate(START_MONTH = as.POSIXct(START_MONTH)) %>%
group_by(START_MONTH, RATING) %>%
summarise(total = n()) %>%
ungroup() %>%
ggplot(aes(x=START_MONTH,
y=total,
colour = RATING)) +
geom_line()

corrgram::corrgram(diamonds)

ggplot(diamonds,
aes(x=carat,
y=sqrt(price))) +
geom_jitter(aes(colour = cut), alpha = 0.5) +
labs(title = "Linear Relationship between Carat and Price",
subtitle = "Source: diamonds datasaet from the ggplot2 packate ",
x = "Carat",
y = "Squared Price") +
scale_y_continuous(label = scales::dollar) +
theme_minimal()

ggplot(diamonds,
aes(x=carat,
y=sqrt(price))) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Linear Relationship between Carat and Price",
subtitle = "Source: diamonds datasaet from the ggplot2 packate ",
x = "Carat",
y = "Squared Price") +
facet_wrap(~cut, scales = "free_y") +
scale_y_continuous(label = scales::dollar) +
theme_minimal()
